1. VERTICAL BARCHART
  2. HORIZONTAL BARCHART
  3. STACKED BARCHART
  4. STAKED BARCHART (DODGED)
  5. STACKED BARCHART (ABSOLUTE)
  6. LINE CHART
  7. HEATMAP/ TILE MAP
  8. BOXPLOT
  9. SCATTERPLOT
  10. ANIMATED SCATTERPLOT
  11. TREEMAP
  12. FACET WRAP
  13. FACET GRID

Load required Libraries and Datasets:

odi_data <- read.csv("D:/datasets/odi-batting.csv")
tweets_data <- read.csv("D:/datasets/narendramodi_tweets.csv")
election_data <- read.csv("D:/datasets/parliament.csv")
iot_data <- read.csv("D:/datasets/data_iot_temperature.csv")

library(dplyr)
library(lubridate)
library(ggplot2)
library(plotly)
library(treemap)

VERTICAL BARCHART

Top_10_Party_names <- election_data %>% filter(Position == 1) %>% group_by(PARTY) %>% summarise(WINS = n()) %>% arrange(desc(WINS)) %>% select(PARTY) %>% head(10)

v_barchart <- election_data %>% filter(YEAR == 2009, PARTY %in% Top_10_Party_names$PARTY) %>% group_by(PARTY) %>% summarise(Total_contestants = n())

v_barchart <- v_barchart %>% ggplot(aes(reorder(PARTY,-Total_contestants),Total_contestants, fill = PARTY)) +
                             geom_col(width = 0.5) + 
                             xlab("PARTY") + ylab("No. of Contestants") + ggtitle("Partywise Total Contestants in 2009") + 
                             geom_text(position = "stack", aes(label = Total_contestants))

v_barchart %>% ggplotly()

HORIZONTAL BARCHART

h_barchart <- election_data %>% filter(YEAR == 2009, Position %in% c(1,2)) %>% group_by(PC, NAME) %>% summarise(TotalVotes = sum(VOTES)) %>% group_by(PC) %>% arrange(PC, desc(TotalVotes)) %>% mutate(diff_votes = c(-diff(TotalVotes), NA)) %>% na.omit() %>% mutate(perc_margin = diff_votes*100/(TotalVotes - diff_votes)) %>% arrange(desc(perc_margin)) %>% head(10)

h_barchart <- h_barchart %>% ggplot(aes(reorder(NAME,perc_margin),perc_margin, fill = NAME)) + 
                             geom_bar(stat = "identity") + 
                             xlab("Contestant Name") + ylab("Percentage Margin") + 
                             coord_flip() + 
                             theme(legend.position = "none", axis.text.y = element_blank(), axis.ticks.y =                            element_blank()) +
                             geom_text(position = position_stack(vjust = 0.8), aes(label = NAME), size = 2)

h_barchart %>% ggplotly()

STACKED BARCHART

stacked <- iot_data %>% filter(temperature < 25|temperature > 30) %>% mutate(Temp_Limit = if_else(temperature >= 31,"Exceeds Upper Limit (30)","Under Lower Limit (25)")) %>% group_by(day(date), Temp_Limit) %>% summarise(count = n())

stacked <- ggplot(stacked[order(stacked$Temp_Limit, decreasing = T),],                                                     aes(`day(date)`,count, fill=Temp_Limit)) + 
                       geom_bar(stat = "identity") + 
                       geom_text(position = position_stack(vjust = 0.5), aes(label = count), size =                        2.5) + 
                       xlab("Day of the Month") + ylab("Abnormal Temperature Count") + ggtitle("Occurences of Abnormal Temperature") +
                       labs(fill = "") + 
                       theme(legend.position = "top") +
                       scale_fill_manual(values = c("#445e87" , "#dbdd49"))

stacked

DODGED BARCHART

dodged <- iot_data %>% filter(temperature < 25|temperature > 30) %>% mutate(Temp_Limit = if_else(temperature >= 31,"Exceeds Upper Limit (30)","Under Lower Limit (25)")) %>% group_by(day(date), Temp_Limit) %>% summarise(count = n())

dodged <- dodged %>% ggplot(aes(`day(date)`,count, fill=Temp_Limit)) + geom_col(width = 1.2, position = "dodge2") + xlab("Day of the Month") + ylab("Abnormal Temperature Count") + ggtitle("Occurences of Abnormal Temperature") + labs(fill = "") + theme(legend.position = "top") + scale_fill_manual(values = c("#c66c45" , "#80dae5")) + geom_text(aes(label = count), position = position_dodge2(width = 1.2), size = 3)

dodged

ABSOLUTE STACKED BARCHART

A2 <- election_data %>% filter(SEX == "M"|SEX == "F") %>% group_by(YEAR,SEX) %>% summarise(GenderCount = n()) %>% group_by(YEAR) %>% mutate(TotalCandidatesperYear = sum(GenderCount)) %>% mutate(Gender_Division = round(GenderCount/TotalCandidatesperYear*100)) %>% arrange(desc(SEX))

A2 <- ggplot(A2, aes(as.factor(YEAR),Gender_Division, fill = SEX)) + geom_col() + xlab("YEAR") + ylab("Gender Split") + ggtitle("Yearwise Percentage of Male and Female Candidates") + scale_fill_manual(values = c("#db4885" , "#6fdb48")) + labs(fill = "")

A2 %>% ggplotly()

LINE CHART

linechart <- iot_data %>% group_by(day(date)) %>% summarise(Avg_Temp_per_Day = round(mean(temperature), digits = 2))

linechart <- linechart %>% ggplot(aes(`day(date)`,Avg_Temp_per_Day)) + geom_line(color = "mediumseagreen", size = 0.8) + geom_point(color = "red", size = 0.8) + xlab("Day of the Month") + ylab("Average Temperature") + ggtitle("Daywise Avg. Temp. for the month of November") + theme(legend.position = "none")

linechart %>% ggplotly()

HEAT MAP

heat_map <- iot_data %>% mutate(day = day(date), hour = hour(date)) %>% group_by(day,hour) %>% mutate(Avg_Temp_per_day = mean(temperature))

library(grDevices)
colorsdefined <- colorRampPalette(c("skyblue","brown1"))(15)

heat_map <- ggplot(heat_map,aes(day,hour, fill = as.factor(Avg_Temp_per_day))) + geom_tile() + xlab("Day of the month") + ylab("Hour of the Day") + ggtitle("Hourly Temperature Readings for the month of November") + scale_fill_manual(values = colorsdefined) + labs(fill = "Temperature") + theme(axis.text.x = element_text(angle = 30),axis.text.y = element_text(angle = 30))

heat_map

BOX PLOT

bplot <- iot_data %>% group_by(day(date)) %>% ggplot(aes(`day(date)`,temperature)) + geom_boxplot(aes(group = `day(date)`, fill = factor(`day(date)`))) + xlab("Day of the Month") + ylab("Temperature Distribution") + ggtitle("Daywise Distribution of Temperature Readings") + theme(legend.position = "none") + scale_x_continuous(breaks = seq(1,30,1))

bplot %>% ggplotly()

SCATTERPLOT (with 2 Quantitative and 1 Categorical Variable)

scatter <- ggplot(tweets_data, aes(retweets_count,favorite_count, color = source)) + geom_point(cex = 0.8, pch = 1.3, show.legend = T) + xlab("Retweets") + ylab("Likes") + ggtitle("Retweets vs. Likes / Device Used") + theme(legend.position = "top")

scatter

ANIMATED SCATTERPLOT

#install.packages("devtools")
#devtools::install_github("dgrtwo/gganimate")
library(animation)
library(gganimate)

odi_data$YEAR <- year(mdy(odi_data$MatchDate))
odi_subset <- odi_data %>% filter(Player == "Sachin R Tendulkar")

#ImageMagick, or other such drivers need to be installed on your computer

#Sys.setenv(PATH = paste("C:/Program Files/ImageMagick-7.0.7-Q16/", Sys.getenv("PATH"), sep = ";"))

magickPath <- shortPathName("C:/Program Files/ImageMagick-7.0.7-Q16/magick.exe")
ani.options(convert = magickPath)

plot <- ggplot(odi_subset, aes(Runs, ScoreRate, frame = YEAR, cex = ScoreRate, color = "Red")) + geom_point() + theme(legend.position = "none") + ylab("Strike Rate") +ggtitle("Sachin's Centuries vs. Strike Rate over the years...")

gganimate(plot, interval = 0.7)

TREE MAP (with 2 Categorical & 1 Quantitative Variable)

library(treemap)

odi_data$Century = odi_data$Runs>99

stadiums = odi_data %>% filter(Century == TRUE) %>% group_by(Player,Ground) %>% summarise(`100's` = n()) %>% arrange(desc(`100's`)) %>% group_by(Ground) %>% mutate(Total_Centuries_in_Stadium = sum(`100's`)) %>% arrange(desc(Total_Centuries_in_Stadium)) %>% filter(Total_Centuries_in_Stadium > 20)

stadiums
## # A tibble: 400 x 4
## # Groups:   Ground [17]
##                    Player             Ground `100's`
##                    <fctr>             <fctr>   <int>
##  1     Sachin R Tendulkar Sharjah CA Stadium       7
##  2            Saeed Anwar Sharjah CA Stadium       7
##  3         Inzamam-ul-Haq Sharjah CA Stadium       4
##  4   Richard B Richardson Sharjah CA Stadium       3
##  5            Salim Malik Sharjah CA Stadium       3
##  6           Aamir Sohail Sharjah CA Stadium       2
##  7           Brian C Lara Sharjah CA Stadium       2
##  8          Carl L Hooper Sharjah CA Stadium       2
##  9 D P Mahela Jayawardene Sharjah CA Stadium       2
## 10           Gary Kirsten Sharjah CA Stadium       2
## # ... with 390 more rows, and 1 more variables:
## #   Total_Centuries_in_Stadium <int>
treemap(stadiums, index = c("Total_Centuries_in_Stadium","Ground","Player"), vSize = "Total_Centuries_in_Stadium", title = "Stadiums with most number of ODI Centuries", palette = "Greens") 

Facet Wrap

Top_10_Players <- odi_data %>% group_by(Player) %>% summarise(TotalRuns = sum(Runs)) %>% arrange(desc(TotalRuns)) %>% head(10)

odi_data$"100's" <- as.numeric(odi_data$Runs > 99)

odi_data$Year <- year(mdy(odi_data$MatchDate))

Top_10_Players_data <- odi_data %>% filter(Player %in% Top_10_Players$Player) %>% group_by(Player, Year) %>%  summarise(Centuries = sum(`100's`))

f_wrap <- ggplot(Top_10_Players_data, aes(Year,Centuries, color = Player)) + geom_point() + geom_line() + facet_wrap(~Player, nrow = 5) + theme(legend.position = "none") + ggtitle("Centuries of Top 10 Run Scorers over the Years")

f_wrap %>% ggplotly()
Top_5_Players <- odi_data %>% group_by(Player) %>% summarise(TotalRuns = sum(Runs)) %>% arrange(desc(TotalRuns)) %>% head(5)

Top_5_Players_data <- odi_data %>% filter(Player %in% Top_5_Players$Player) %>% group_by(Player, Year) %>%  summarise(Centuries = sum(`100's`))
  
f_grid <- ggplot(Top_5_Players_data, aes(Year,Centuries, color = Player)) + geom_jitter() + geom_smooth(method = "loess") + facet_grid(Player~.) + theme(legend.position = "none") + ggtitle("Centuries of Top 5 Run Scorers over the Years")

f_grid %>% ggplotly()